#!/usr/bin/env perl
#
# ensembl-fetch -- fetch fasta, txinfo, and seqinfo from ensembl
# You'll need to install the ensembl core api (at least) to use this
# code. You'll also need Config::IniFiles and Log::Log4perl installed.
#
# The ensembl api uses deprecated perl features. I think 5.16 is the
# most recent that can be used.
#
# $ perlbrew use perl-5.16.3
# $ perlbrew install-cpanm
# $ cpanm Config::IniFiles Log::Log4perl DBI DBD::mysql
#
# Tips: 
# * rsync --delete-excluded --exclude=.git -HRavz opt/ensembl/82 projects/biocommons/uta/{sbin,loading/Makefile,loading/etc}/ minion:
# * See http://uswest.ensembl.org/info/docs/api/api_git.html
# for ensembl api installation instructions.

use strict;
use warnings;

use Config::IniFiles;
use Data::Dumper;
use File::Path qw(remove_tree);
use Getopt::Long qw(:config gnu_compat);
use IO::Compress::Gzip;
use IO::File;
use IO::Zlib;
use Log::Log4perl;

use Bio::EnsEMBL::ApiVersion;
use Bio::EnsEMBL::Registry;

use FindBin;

sub process1($$$$$);
sub process_subgenes($$);
sub process_genes($@);
sub fetch_Gene_by_name($$);

############################################################################

my $root = "$FindBin::RealBin/..";

my $opts = {
    'primary-only' => 0,
    'npartitions' => 1000,
    'divisor' => undef,
    'modulus' => undef,
    'host' => 'ensembldb.ensembl.org',
    'port' => 5306,		# 3337 is GRCh37; 5306 is GRCh38 (>=e76)
    'user' => 'anonymous',
    'pass' => undef,
};

my %ac_to_name = (
	'NC_000001.11' => '1',
	'NC_000002.12' => '2',
	'NC_000003.12' => '3',
	'NC_000004.12' => '4',
	'NC_000005.10' => '5',
	'NC_000006.12' => '6',
	'NC_000007.14' => '7',
	'NC_000008.11' => '8',
	'NC_000009.12' => '9',
	'NC_000010.11' => '10',
	'NC_000011.10' => '11',
	'NC_000012.12' => '12',
	'NC_000013.11' => '13',
	'NC_000014.9' => '14',
	'NC_000015.10' => '15',
	'NC_000016.10' => '16',
	'NC_000017.11' => '17',
	'NC_000018.10' => '18',
	'NC_000019.10' => '19',
	'NC_000020.11' => '20',
	'NC_000021.9' => '21',
	'NC_000022.11' => '22',
	'NC_000023.11' => '23',
	'NC_000024.10' => '24',
	'NC_000023.11' => 'X',
	'NC_000024.10' => 'Y',
	'NC_012920.1' => 'M',
	'NW_003315934.1' => '10_GL383545V1_ALT',
	'NW_003315935.1' => '10_GL383546V1_ALT',
	'NT_187579.1' => '10_KI270824V1_ALT',
	'NT_187580.1' => '10_KI270825V1_ALT',
	'NW_003315936.1' => '11_GL383547V1_ALT',
	'NW_003871073.1' => '11_JH159136V1_ALT',
	'NW_003871074.1' => '11_JH159137V1_ALT',
	'NT_187376.1' => '11_KI270721V1_RANDOM',
	'NT_187581.1' => '11_KI270826V1_ALT',
	'NT_187582.1' => '11_KI270827V1_ALT',
	'NT_187583.1' => '11_KI270829V1_ALT',
	'NT_187584.1' => '11_KI270830V1_ALT',
	'NT_187585.1' => '11_KI270831V1_ALT',
	'NT_187586.1' => '11_KI270832V1_ALT',
	'NT_187656.1' => '11_KI270902V1_ALT',
	'NT_187657.1' => '11_KI270903V1_ALT',
	'NT_187681.1' => '11_KI270927V1_ALT',
	'NW_003315938.1' => '12_GL383549V1_ALT',
	'NW_003315939.2' => '12_GL383550V2_ALT',
	'NW_003315940.1' => '12_GL383551V1_ALT',
	'NW_003315941.1' => '12_GL383552V1_ALT',
	'NW_003315942.2' => '12_GL383553V2_ALT',
	'NW_003571049.1' => '12_GL877875V1_ALT',
	'NW_003571050.1' => '12_GL877876V1_ALT',
	'NT_187589.1' => '12_KI270833V1_ALT',
	'NT_187590.1' => '12_KI270834V1_ALT',
	'NT_187587.1' => '12_KI270835V1_ALT',
	'NT_187591.1' => '12_KI270836V1_ALT',
	'NT_187588.1' => '12_KI270837V1_ALT',
	'NT_187658.1' => '12_KI270904V1_ALT',
	'NT_187592.1' => '13_KI270838V1_ALT',
	'NT_187593.1' => '13_KI270839V1_ALT',
	'NT_187594.1' => '13_KI270840V1_ALT',
	'NT_187595.1' => '13_KI270841V1_ALT',
	'NT_187596.1' => '13_KI270842V1_ALT',
	'NT_187597.1' => '13_KI270843V1_ALT',
	'NT_113796.3' => '14_GL000009V2_RANDOM',
	'NT_113888.1' => '14_GL000194V1_RANDOM',
	'NT_167219.1' => '14_GL000225V1_RANDOM',
	'NT_187377.1' => '14_KI270722V1_RANDOM',
	'NT_187378.1' => '14_KI270723V1_RANDOM',
	'NT_187379.1' => '14_KI270724V1_RANDOM',
	'NT_187380.1' => '14_KI270725V1_RANDOM',
	'NT_187381.1' => '14_KI270726V1_RANDOM',
	'NT_187598.1' => '14_KI270844V1_ALT',
	'NT_187599.1' => '14_KI270845V1_ALT',
	'NT_187600.1' => '14_KI270846V1_ALT',
	'NT_187601.1' => '14_KI270847V1_ALT',
	'NW_003315943.1' => '15_GL383554V1_ALT',
	'NW_003315944.2' => '15_GL383555V2_ALT',
	'NT_187382.1' => '15_KI270727V1_RANDOM',
	'NT_187603.1' => '15_KI270848V1_ALT',
	'NT_187605.1' => '15_KI270849V1_ALT',
	'NT_187606.1' => '15_KI270850V1_ALT',
	'NT_187604.1' => '15_KI270851V1_ALT',
	'NT_187602.1' => '15_KI270852V1_ALT',
	'NT_187660.1' => '15_KI270905V1_ALT',
	'NT_187659.1' => '15_KI270906V1_ALT',
	'NW_003315945.1' => '16_GL383556V1_ALT',
	'NW_003315946.1' => '16_GL383557V1_ALT',
	'NT_187383.1' => '16_KI270728V1_RANDOM',
	'NT_187607.1' => '16_KI270853V1_ALT',
	'NT_187610.1' => '16_KI270854V1_ALT',
	'NT_187608.1' => '16_KI270855V1_ALT',
	'NT_187609.1' => '16_KI270856V1_ALT',
	'NT_113930.2' => '17_GL000205V2_RANDOM',
	'NT_167251.2' => '17_GL000258V2_ALT',
	'NW_003315952.3' => '17_GL383563V3_ALT',
	'NW_003315953.2' => '17_GL383564V2_ALT',
	'NW_003315954.1' => '17_GL383565V1_ALT',
	'NW_003315955.1' => '17_GL383566V1_ALT',
	'NW_003871091.1' => '17_JH159146V1_ALT',
	'NW_003871092.1' => '17_JH159147V1_ALT',
	'NW_003871093.1' => '17_JH159148V1_ALT',
	'NT_187384.1' => '17_KI270729V1_RANDOM',
	'NT_187385.1' => '17_KI270730V1_RANDOM',
	'NT_187614.1' => '17_KI270857V1_ALT',
	'NT_187615.1' => '17_KI270858V1_ALT',
	'NT_187616.1' => '17_KI270859V1_ALT',
	'NT_187612.1' => '17_KI270860V1_ALT',
	'NT_187611.1' => '17_KI270861V1_ALT',
	'NT_187613.1' => '17_KI270862V1_ALT',
	'NT_187662.1' => '17_KI270907V1_ALT',
	'NT_187663.1' => '17_KI270908V1_ALT',
	'NT_187661.1' => '17_KI270909V1_ALT',
	'NT_187664.1' => '17_KI270910V1_ALT',
	'NW_003315956.1' => '18_GL383567V1_ALT',
	'NW_003315957.1' => '18_GL383568V1_ALT',
	'NW_003315958.1' => '18_GL383569V1_ALT',
	'NW_003315959.1' => '18_GL383570V1_ALT',
	'NW_003315960.1' => '18_GL383571V1_ALT',
	'NW_003315961.1' => '18_GL383572V1_ALT',
	'NT_187617.1' => '18_KI270863V1_ALT',
	'NT_187618.1' => '18_KI270864V1_ALT',
	'NT_187666.1' => '18_KI270911V1_ALT',
	'NT_187665.1' => '18_KI270912V1_ALT',
	'NT_113949.2' => '19_GL000209V2_ALT',
	'NW_003315962.1' => '19_GL383573V1_ALT',
	'NW_003315963.1' => '19_GL383574V1_ALT',
	'NW_003315964.2' => '19_GL383575V2_ALT',
	'NW_003315965.1' => '19_GL383576V1_ALT',
	'NW_003571054.1' => '19_GL949746V1_ALT',
	'NW_003571055.2' => '19_GL949747V2_ALT',
	'NW_003571056.2' => '19_GL949748V2_ALT',
	'NW_003571057.2' => '19_GL949749V2_ALT',
	'NW_003571058.2' => '19_GL949750V2_ALT',
	'NW_003571059.2' => '19_GL949751V2_ALT',
	'NW_003571060.1' => '19_GL949752V1_ALT',
	'NW_003571061.2' => '19_GL949753V2_ALT',
	'NT_187621.1' => '19_KI270865V1_ALT',
	'NT_187619.1' => '19_KI270866V1_ALT',
	'NT_187620.1' => '19_KI270867V1_ALT',
	'NT_187622.1' => '19_KI270868V1_ALT',
	'NT_187636.1' => '19_KI270882V1_ALT',
	'NT_187637.1' => '19_KI270883V1_ALT',
	'NT_187638.1' => '19_KI270884V1_ALT',
	'NT_187639.1' => '19_KI270885V1_ALT',
	'NT_187640.1' => '19_KI270886V1_ALT',
	'NT_187641.1' => '19_KI270887V1_ALT',
	'NT_187642.1' => '19_KI270888V1_ALT',
	'NT_187643.1' => '19_KI270889V1_ALT',
	'NT_187644.1' => '19_KI270890V1_ALT',
	'NT_187645.1' => '19_KI270891V1_ALT',
	'NT_187668.1' => '19_KI270914V1_ALT',
	'NT_187669.1' => '19_KI270915V1_ALT',
	'NT_187670.1' => '19_KI270916V1_ALT',
	'NT_187671.1' => '19_KI270917V1_ALT',
	'NT_187672.1' => '19_KI270918V1_ALT',
	'NT_187673.1' => '19_KI270919V1_ALT',
	'NT_187674.1' => '19_KI270920V1_ALT',
	'NT_187675.1' => '19_KI270921V1_ALT',
	'NT_187676.1' => '19_KI270922V1_ALT',
	'NT_187677.1' => '19_KI270923V1_ALT',
	'NT_187683.1' => '19_KI270929V1_ALT',
	'NT_187684.1' => '19_KI270930V1_ALT',
	'NT_187685.1' => '19_KI270931V1_ALT',
	'NT_187686.1' => '19_KI270932V1_ALT',
	'NT_187687.1' => '19_KI270933V1_ALT',
	'NT_187693.1' => '19_KI270938V1_ALT',
	'NW_003315905.1' => '1_GL383518V1_ALT',
	'NW_003315906.1' => '1_GL383519V1_ALT',
	'NW_003315907.2' => '1_GL383520V2_ALT',
	'NT_187361.1' => '1_KI270706V1_RANDOM',
	'NT_187362.1' => '1_KI270707V1_RANDOM',
	'NT_187363.1' => '1_KI270708V1_RANDOM',
	'NT_187364.1' => '1_KI270709V1_RANDOM',
	'NT_187365.1' => '1_KI270710V1_RANDOM',
	'NT_187366.1' => '1_KI270711V1_RANDOM',
	'NT_187367.1' => '1_KI270712V1_RANDOM',
	'NT_187368.1' => '1_KI270713V1_RANDOM',
	'NT_187369.1' => '1_KI270714V1_RANDOM',
	'NT_187516.1' => '1_KI270759V1_ALT',
	'NT_187514.1' => '1_KI270760V1_ALT',
	'NT_187518.1' => '1_KI270761V1_ALT',
	'NT_187515.1' => '1_KI270762V1_ALT',
	'NT_187519.1' => '1_KI270763V1_ALT',
	'NT_187521.1' => '1_KI270764V1_ALT',
	'NT_187520.1' => '1_KI270765V1_ALT',
	'NT_187517.1' => '1_KI270766V1_ALT',
	'NT_187646.1' => '1_KI270892V1_ALT',
	'NW_003315966.2' => '20_GL383577V2_ALT',
	'NT_187623.1' => '20_KI270869V1_ALT',
	'NT_187624.1' => '20_KI270870V1_ALT',
	'NT_187625.1' => '20_KI270871V1_ALT',
	'NW_003315967.2' => '21_GL383578V2_ALT',
	'NW_003315968.2' => '21_GL383579V2_ALT',
	'NW_003315969.2' => '21_GL383580V2_ALT',
	'NW_003315970.2' => '21_GL383581V2_ALT',
	'NT_187626.1' => '21_KI270872V1_ALT',
	'NT_187627.1' => '21_KI270873V1_ALT',
	'NT_187628.1' => '21_KI270874V1_ALT',
	'NW_003315971.2' => '22_GL383582V2_ALT',
	'NW_003315972.2' => '22_GL383583V2_ALT',
	'NW_004504305.1' => '22_KB663609V1_ALT',
	'NT_187386.1' => '22_KI270731V1_RANDOM',
	'NT_187387.1' => '22_KI270732V1_RANDOM',
	'NT_187388.1' => '22_KI270733V1_RANDOM',
	'NT_187389.1' => '22_KI270734V1_RANDOM',
	'NT_187390.1' => '22_KI270735V1_RANDOM',
	'NT_187391.1' => '22_KI270736V1_RANDOM',
	'NT_187392.1' => '22_KI270737V1_RANDOM',
	'NT_187393.1' => '22_KI270738V1_RANDOM',
	'NT_187394.1' => '22_KI270739V1_RANDOM',
	'NT_187629.1' => '22_KI270875V1_ALT',
	'NT_187630.1' => '22_KI270876V1_ALT',
	'NT_187631.1' => '22_KI270877V1_ALT',
	'NT_187632.1' => '22_KI270878V1_ALT',
	'NT_187633.1' => '22_KI270879V1_ALT',
	'NT_187682.1' => '22_KI270928V1_ALT',
	'NW_003315908.1' => '2_GL383521V1_ALT',
	'NW_003315909.1' => '2_GL383522V1_ALT',
	'NW_003571033.2' => '2_GL582966V2_ALT',
	'NT_187370.1' => '2_KI270715V1_RANDOM',
	'NT_187371.1' => '2_KI270716V1_RANDOM',
	'NT_187523.1' => '2_KI270767V1_ALT',
	'NT_187528.1' => '2_KI270768V1_ALT',
	'NT_187522.1' => '2_KI270769V1_ALT',
	'NT_187525.1' => '2_KI270770V1_ALT',
	'NT_187530.1' => '2_KI270771V1_ALT',
	'NT_187524.1' => '2_KI270772V1_ALT',
	'NT_187526.1' => '2_KI270773V1_ALT',
	'NT_187529.1' => '2_KI270774V1_ALT',
	'NT_187531.1' => '2_KI270775V1_ALT',
	'NT_187527.1' => '2_KI270776V1_ALT',
	'NT_187647.1' => '2_KI270893V1_ALT',
	'NT_187648.1' => '2_KI270894V1_ALT',
	'NT_167215.1' => '3_GL000221V1_RANDOM',
	'NW_003315913.1' => '3_GL383526V1_ALT',
	'NW_003871060.2' => '3_JH636055V2_ALT',
	'NT_187533.1' => '3_KI270777V1_ALT',
	'NT_187536.1' => '3_KI270778V1_ALT',
	'NT_187532.1' => '3_KI270779V1_ALT',
	'NT_187537.1' => '3_KI270780V1_ALT',
	'NT_187538.1' => '3_KI270781V1_ALT',
	'NT_187534.1' => '3_KI270782V1_ALT',
	'NT_187535.1' => '3_KI270783V1_ALT',
	'NT_187539.1' => '3_KI270784V1_ALT',
	'NT_187649.1' => '3_KI270895V1_ALT',
	'NT_187678.1' => '3_KI270924V1_ALT',
	'NT_187688.1' => '3_KI270934V1_ALT',
	'NT_187689.1' => '3_KI270935V1_ALT',
	'NT_187690.1' => '3_KI270936V1_ALT',
	'NT_187691.1' => '3_KI270937V1_ALT',
	'NT_113793.3' => '4_GL000008V2_RANDOM',
	'NT_167250.2' => '4_GL000257V2_ALT',
	'NW_003315914.1' => '4_GL383527V1_ALT',
	'NW_003315915.1' => '4_GL383528V1_ALT',
	'NT_187542.1' => '4_KI270785V1_ALT',
	'NT_187543.1' => '4_KI270786V1_ALT',
	'NT_187541.1' => '4_KI270787V1_ALT',
	'NT_187544.1' => '4_KI270788V1_ALT',
	'NT_187545.1' => '4_KI270789V1_ALT',
	'NT_187540.1' => '4_KI270790V1_ALT',
	'NT_187650.1' => '4_KI270896V1_ALT',
	'NT_187679.1' => '4_KI270925V1_ALT',
	'NT_113948.1' => '5_GL000208V1_RANDOM',
	'NW_003315917.2' => '5_GL339449V2_ALT',
	'NW_003315918.1' => '5_GL383530V1_ALT',
	'NW_003315919.1' => '5_GL383531V1_ALT',
	'NW_003315920.1' => '5_GL383532V1_ALT',
	'NW_003571036.1' => '5_GL949742V1_ALT',
	'NT_187547.1' => '5_KI270791V1_ALT',
	'NT_187548.1' => '5_KI270792V1_ALT',
	'NT_187550.1' => '5_KI270793V1_ALT',
	'NT_187551.1' => '5_KI270794V1_ALT',
	'NT_187546.1' => '5_KI270795V1_ALT',
	'NT_187549.1' => '5_KI270796V1_ALT',
	'NT_187651.1' => '5_KI270897V1_ALT',
	'NT_187652.1' => '5_KI270898V1_ALT',
	'NT_167244.2' => '6_GL000250V2_ALT',
	'NT_113891.3' => '6_GL000251V2_ALT',
	'NT_167245.2' => '6_GL000252V2_ALT',
	'NT_167246.2' => '6_GL000253V2_ALT',
	'NT_167247.2' => '6_GL000254V2_ALT',
	'NT_167248.2' => '6_GL000255V2_ALT',
	'NT_167249.2' => '6_GL000256V2_ALT',
	'NW_003315921.1' => '6_GL383533V1_ALT',
	'NW_004166862.2' => '6_KB021644V2_ALT',
	'NT_187692.1' => '6_KI270758V1_ALT',
	'NT_187552.1' => '6_KI270797V1_ALT',
	'NT_187553.1' => '6_KI270798V1_ALT',
	'NT_187554.1' => '6_KI270799V1_ALT',
	'NT_187555.1' => '6_KI270800V1_ALT',
	'NT_187556.1' => '6_KI270801V1_ALT',
	'NT_187557.1' => '6_KI270802V1_ALT',
	'NW_003315922.2' => '7_GL383534V2_ALT',
	'NT_187562.1' => '7_KI270803V1_ALT',
	'NT_187558.1' => '7_KI270804V1_ALT',
	'NT_187560.1' => '7_KI270805V1_ALT',
	'NT_187559.1' => '7_KI270806V1_ALT',
	'NT_187563.1' => '7_KI270807V1_ALT',
	'NT_187564.1' => '7_KI270808V1_ALT',
	'NT_187561.1' => '7_KI270809V1_ALT',
	'NT_187653.1' => '7_KI270899V1_ALT',
	'NT_187567.1' => '8_KI270810V1_ALT',
	'NT_187565.1' => '8_KI270811V1_ALT',
	'NT_187568.1' => '8_KI270812V1_ALT',
	'NT_187570.1' => '8_KI270813V1_ALT',
	'NT_187566.1' => '8_KI270814V1_ALT',
	'NT_187569.1' => '8_KI270815V1_ALT',
	'NT_187571.1' => '8_KI270816V1_ALT',
	'NT_187573.1' => '8_KI270817V1_ALT',
	'NT_187572.1' => '8_KI270818V1_ALT',
	'NT_187574.1' => '8_KI270819V1_ALT',
	'NT_187575.1' => '8_KI270820V1_ALT',
	'NT_187576.1' => '8_KI270821V1_ALT',
	'NT_187577.1' => '8_KI270822V1_ALT',
	'NT_187654.1' => '8_KI270900V1_ALT',
	'NT_187655.1' => '8_KI270901V1_ALT',
	'NT_187680.1' => '8_KI270926V1_ALT',
	'NW_003315928.1' => '9_GL383539V1_ALT',
	'NW_003315929.1' => '9_GL383540V1_ALT',
	'NW_003315930.1' => '9_GL383541V1_ALT',
	'NW_003315931.1' => '9_GL383542V1_ALT',
	'NT_187372.1' => '9_KI270717V1_RANDOM',
	'NT_187373.1' => '9_KI270718V1_RANDOM',
	'NT_187374.1' => '9_KI270719V1_RANDOM',
	'NT_187375.1' => '9_KI270720V1_RANDOM',
	'NT_187578.1' => '9_KI270823V1_ALT',
	'NT_113901.1' => 'UN_GL000195V1',
	'NT_167208.1' => 'UN_GL000213V1',
	'NT_167209.1' => 'UN_GL000214V1',
	'NT_167211.2' => 'UN_GL000216V2',
	'NT_113889.1' => 'UN_GL000218V1',
	'NT_167213.1' => 'UN_GL000219V1',
	'NT_167214.1' => 'UN_GL000220V1',
	'NT_167218.1' => 'UN_GL000224V1',
	'NT_167220.1' => 'UN_GL000226V1',
	'NT_187396.1' => 'UN_KI270302V1',
	'NT_187398.1' => 'UN_KI270303V1',
	'NT_187397.1' => 'UN_KI270304V1',
	'NT_187399.1' => 'UN_KI270305V1',
	'NT_187402.1' => 'UN_KI270310V1',
	'NT_187406.1' => 'UN_KI270311V1',
	'NT_187405.1' => 'UN_KI270312V1',
	'NT_187404.1' => 'UN_KI270315V1',
	'NT_187403.1' => 'UN_KI270316V1',
	'NT_187407.1' => 'UN_KI270317V1',
	'NT_187401.1' => 'UN_KI270320V1',
	'NT_187400.1' => 'UN_KI270322V1',
	'NT_187459.1' => 'UN_KI270329V1',
	'NT_187458.1' => 'UN_KI270330V1',
	'NT_187461.1' => 'UN_KI270333V1',
	'NT_187460.1' => 'UN_KI270334V1',
	'NT_187462.1' => 'UN_KI270335V1',
	'NT_187465.1' => 'UN_KI270336V1',
	'NT_187466.1' => 'UN_KI270337V1',
	'NT_187463.1' => 'UN_KI270338V1',
	'NT_187464.1' => 'UN_KI270340V1',
	'NT_187469.1' => 'UN_KI270362V1',
	'NT_187467.1' => 'UN_KI270363V1',
	'NT_187468.1' => 'UN_KI270364V1',
	'NT_187470.1' => 'UN_KI270366V1',
	'NT_187494.1' => 'UN_KI270371V1',
	'NT_187491.1' => 'UN_KI270372V1',
	'NT_187492.1' => 'UN_KI270373V1',
	'NT_187490.1' => 'UN_KI270374V1',
	'NT_187493.1' => 'UN_KI270375V1',
	'NT_187489.1' => 'UN_KI270376V1',
	'NT_187471.1' => 'UN_KI270378V1',
	'NT_187472.1' => 'UN_KI270379V1',
	'NT_187486.1' => 'UN_KI270381V1',
	'NT_187488.1' => 'UN_KI270382V1',
	'NT_187482.1' => 'UN_KI270383V1',
	'NT_187484.1' => 'UN_KI270384V1',
	'NT_187487.1' => 'UN_KI270385V1',
	'NT_187480.1' => 'UN_KI270386V1',
	'NT_187475.1' => 'UN_KI270387V1',
	'NT_187478.1' => 'UN_KI270388V1',
	'NT_187473.1' => 'UN_KI270389V1',
	'NT_187474.1' => 'UN_KI270390V1',
	'NT_187481.1' => 'UN_KI270391V1',
	'NT_187485.1' => 'UN_KI270392V1',
	'NT_187483.1' => 'UN_KI270393V1',
	'NT_187479.1' => 'UN_KI270394V1',
	'NT_187476.1' => 'UN_KI270395V1',
	'NT_187477.1' => 'UN_KI270396V1',
	'NT_187409.1' => 'UN_KI270411V1',
	'NT_187408.1' => 'UN_KI270412V1',
	'NT_187410.1' => 'UN_KI270414V1',
	'NT_187415.1' => 'UN_KI270417V1',
	'NT_187412.1' => 'UN_KI270418V1',
	'NT_187411.1' => 'UN_KI270419V1',
	'NT_187413.1' => 'UN_KI270420V1',
	'NT_187416.1' => 'UN_KI270422V1',
	'NT_187417.1' => 'UN_KI270423V1',
	'NT_187414.1' => 'UN_KI270424V1',
	'NT_187418.1' => 'UN_KI270425V1',
	'NT_187419.1' => 'UN_KI270429V1',
	'NT_187424.1' => 'UN_KI270435V1',
	'NT_187425.1' => 'UN_KI270438V1',
	'NT_187420.1' => 'UN_KI270442V1',
	'NT_187495.1' => 'UN_KI270448V1',
	'NT_187422.1' => 'UN_KI270465V1',
	'NT_187421.1' => 'UN_KI270466V1',
	'NT_187423.1' => 'UN_KI270467V1',
	'NT_187426.1' => 'UN_KI270468V1',
	'NT_187437.1' => 'UN_KI270507V1',
	'NT_187430.1' => 'UN_KI270508V1',
	'NT_187428.1' => 'UN_KI270509V1',
	'NT_187427.1' => 'UN_KI270510V1',
	'NT_187435.1' => 'UN_KI270511V1',
	'NT_187432.1' => 'UN_KI270512V1',
	'NT_187436.1' => 'UN_KI270515V1',
	'NT_187431.1' => 'UN_KI270516V1',
	'NT_187438.1' => 'UN_KI270517V1',
	'NT_187429.1' => 'UN_KI270518V1',
	'NT_187433.1' => 'UN_KI270519V1',
	'NT_187496.1' => 'UN_KI270521V1',
	'NT_187434.1' => 'UN_KI270522V1',
	'NT_187440.1' => 'UN_KI270528V1',
	'NT_187439.1' => 'UN_KI270529V1',
	'NT_187441.1' => 'UN_KI270530V1',
	'NT_187443.1' => 'UN_KI270538V1',
	'NT_187442.1' => 'UN_KI270539V1',
	'NT_187444.1' => 'UN_KI270544V1',
	'NT_187445.1' => 'UN_KI270548V1',
	'NT_187450.1' => 'UN_KI270579V1',
	'NT_187448.1' => 'UN_KI270580V1',
	'NT_187449.1' => 'UN_KI270581V1',
	'NT_187454.1' => 'UN_KI270582V1',
	'NT_187446.1' => 'UN_KI270583V1',
	'NT_187453.1' => 'UN_KI270584V1',
	'NT_187447.1' => 'UN_KI270587V1',
	'NT_187455.1' => 'UN_KI270588V1',
	'NT_187451.1' => 'UN_KI270589V1',
	'NT_187452.1' => 'UN_KI270590V1',
	'NT_187457.1' => 'UN_KI270591V1',
	'NT_187456.1' => 'UN_KI270593V1',
	'NT_187497.1' => 'UN_KI270741V1',
	'NT_187513.1' => 'UN_KI270742V1',
	'NT_187498.1' => 'UN_KI270743V1',
	'NT_187499.1' => 'UN_KI270744V1',
	'NT_187500.1' => 'UN_KI270745V1',
	'NT_187501.1' => 'UN_KI270746V1',
	'NT_187502.1' => 'UN_KI270747V1',
	'NT_187503.1' => 'UN_KI270748V1',
	'NT_187504.1' => 'UN_KI270749V1',
	'NT_187505.1' => 'UN_KI270750V1',
	'NT_187506.1' => 'UN_KI270751V1',
	'NT_187507.1' => 'UN_KI270752V1',
	'NT_187508.1' => 'UN_KI270753V1',
	'NT_187509.1' => 'UN_KI270754V1',
	'NT_187510.1' => 'UN_KI270755V1',
	'NT_187511.1' => 'UN_KI270756V1',
	'NT_187512.1' => 'UN_KI270757V1',
	'NT_187634.1' => 'X_KI270880V1_ALT',
	'NT_187635.1' => 'X_KI270881V1_ALT',
	'NT_187667.1' => 'X_KI270913V1_ALT',
	'NT_187395.1' => 'Y_KI270740V1_RANDOM',
	'NW_012132914.1' => 'HG1342_HG2282_PATCH',
	'NW_015495298.1' => 'HSCHR1_5_CTG3',
	'NW_011332688.1' => 'HG2095_PATCH',
	'NW_014040926.1' => 'HSCHR1_4_CTG3',
	'NW_009646195.1' => 'HG2058_PATCH',
	'NW_018654706.1' => 'HSCHR1_8_CTG3',
	'NW_019805487.1' => 'HG460_PATCH',
	'NW_009646194.1' => 'HG986_PATCH',
	'NW_018654707.1' => 'HSCHR1_9_CTG3',
	'NW_014040925.1' => 'HSCHR1_3_CTG3',
	'NW_017852928.1' => 'HSCHR1_6_CTG3',
	'NW_009646196.1' => 'HG2104_PATCH',
	'NW_011332687.1' => 'HG1832_PATCH',
	'NW_018654708.1' => 'HG2002_PATCH',
	'NW_014040927.1' => 'HSCHR1_5_CTG32_1',
	'NW_012132915.1' => 'HG2290_PATCH',
	'NW_018654709.1' => 'HSCHR2_7_CTG7_2',
	'NW_015495299.1' => 'HSCHR2_6_CTG7_2',
	'NW_018654710.1' => 'HSCHR2_8_CTG7_2',
	'NW_011332690.1' => 'HG2232_PATCH',
	'NW_011332689.1' => 'HG2233_PATCH',
	'NW_017363813.1' => 'HG2236_PATCH',
	'NW_009646197.1' => 'HG2066_PATCH',
	'NW_012132916.1' => 'HG2235_PATCH',
	'NW_011332691.1' => 'HG126_PATCH',
	'NW_018654711.1' => 'HSCHR3_4_CTG1',
	'NW_012132917.1' => 'HG2237_PATCH',
	'NW_009646198.1' => 'HG2022_PATCH',
	'NW_019805491.1' => 'HG2133_PATCH',
	'NW_019805492.1' => 'HSCHR3_6_CTG2_1',
	'NW_019805490.1' => 'HSCHR3_9_CTG2_1',
	'NW_019805489.1' => 'HSCHR3_8_CTG2_1',
	'NW_019805488.1' => 'HSCHR3_7_CTG2_1',
	'NW_013171799.1' => 'HSCHR4_2_CTG4',
	'NW_013171800.1' => 'HSCHR4_8_CTG12',
	'NW_013171801.1' => 'HSCHR4_9_CTG12',
	'NW_017363814.1' => 'HSCHR4_12_CTG12',
	'NW_015495300.1' => 'HG2023_PATCH',
	'NW_015495301.1' => 'HSCHR4_11_CTG12',
	'NW_018654712.1' => 'HSCHR5_9_CTG1',
	'NW_009646199.1' => 'HSCHR5_7_CTG1',
	'NW_016107297.1' => 'HSCHR5_8_CTG1',
	'NW_016107298.1' => 'HG30_PATCH',
	'NW_018654713.1' => 'HG2057_PATCH',
	'NW_013171803.1' => 'HSCHR6_1_CTG10',
	'NW_012132918.1' => 'HG1651_PATCH',
	'NW_009646200.1' => 'HG2128_PATCH',
	'NW_013171802.1' => 'HG2072_PATCH',
	'NW_017363815.1' => 'HG2121_PATCH',
	'NW_019805493.1' => 'HSCHR7_3_CTG1',
	'NW_017852929.1' => 'HG2088_PATCH',
	'NW_017852930.1' => 'HG2266_PATCH',
	'NW_018654714.1' => 'HG708_PATCH',
	'NW_018654715.1' => 'HSCHR7_3_CTG4_4',
	'NW_012132919.1' => 'HG2239_PATCH',
	'NW_018654717.1' => 'HG76_PATCH',
	'NW_017852932.1' => 'HG2068_PATCH',
	'NW_017852931.1' => 'HG2067_PATCH',
	'NW_019805494.1' => 'HSCHR8_7_CTG7',
	'NW_018654716.1' => 'HG2419_PATCH',
	'NW_013171804.1' => 'HSCHR9_1_CTG6',
	'NW_013171805.1' => 'HSCHR9_1_CTG7',
	'NW_009646201.1' => 'HG2030_PATCH',
	'NW_011332694.1' => 'HG2244_HG2245_PATCH',
	'NW_013171806.1' => 'HSCHR10_1_CTG6',
	'NW_009646202.1' => 'HG2191_PATCH',
	'NW_013171807.1' => 'HG2334_PATCH',
	'NW_011332693.1' => 'HG2242_HG2243_PATCH',
	'NW_011332692.1' => 'HG2241_PATCH',
	'NW_015148966.1' => 'HG107_PATCH',
	'NW_011332695.1' => 'HSCHR11_1_CTG1_2',
	'NW_019805496.1' => 'HG2114_PATCH',
	'NW_019805495.1' => 'HG2060_PATCH',
	'NW_017363816.1' => 'HG1708_PATCH',
	'NW_019805498.1' => 'HSCHR11_1_CTG3_1',
	'NW_019805497.1' => 'HSCHR11_2_CTG8',
	'NW_013171808.1' => 'HG2116_PATCH',
	'NW_009646203.1' => 'HG2217_PATCH',
	'NW_013171809.1' => 'HSCHR12_2_CTG1',
	'NW_018654718.1' => 'HG1815_PATCH',
	'NW_011332696.1' => 'HG1362_PATCH',
	'NW_009646204.1' => 'HG23_PATCH',
	'NW_018654720.1' => 'HSCHR12_8_CTG2_1',
	'NW_015148967.1' => 'HG2063_PATCH',
	'NW_018654719.1' => 'HG2047_PATCH',
	'NW_011332697.1' => 'HG2247_PATCH',
	'NW_019805499.1' => 'HSCHR12_9_CTG2_1',
	'NW_011332699.1' => 'HG2291_PATCH',
	'NW_013171810.1' => 'HSCHR13_1_CTG7',
	'NW_009646205.1' => 'HG2216_PATCH',
	'NW_011332700.1' => 'HG2249_PATCH',
	'NW_013171811.1' => 'HSCHR13_1_CTG8',
	'NW_011332698.1' => 'HG2288_HG2289_PATCH',
	'NW_018654722.1' => 'HG1_PATCH',
	'NW_018654721.1' => 'HSCHR14_8_CTG1',
	'NW_011332701.1' => 'HG2139_PATCH',
	'NW_012132920.1' => 'HSCHR15_6_CTG8',
	'NW_013171812.1' => 'HSCHR16_5_CTG1',
	'NW_019805500.1' => 'HG2263_PATCH',
	'NW_017852933.1' => 'HG926_PATCH',
	'NW_013171813.1' => 'HSCHR16_4_CTG3_1',
	'NW_018654723.1' => 'HSCHR16_5_CTG3_1',
	'NW_012132921.1' => 'HSCHR16_3_CTG3_1',
	'NW_017363817.1' => 'HG2285_HG106_HG2252_PATCH',
	'NW_016107299.1' => 'HG2046_PATCH',
	'NW_017363819.1' => 'HSCHR17_3_CTG1',
	'NW_017363818.1' => 'HSCHR17_11_CTG4',
	'NW_019805501.1' => 'HSCHR17_12_CTG4',
	'NW_019805503.1' => 'HSCHR18_1_CTG1',
	'NW_014040928.1' => 'HSCHR18_5_CTG1_1',
	'NW_019805502.1' => 'HG2412_PATCH',
	'NW_013171814.1' => 'HG2213_PATCH',
	'NW_018654724.1' => 'HG2442_PATCH',
	'NW_014040929.1' => 'HG26_PATCH',
	'NW_009646206.1' => 'HG2021_PATCH',
	'NW_016107300.1' => 'HSCHR19KIR_0019-4656-A_CTG3_1',
	'NW_016107301.1' => 'HSCHR19KIR_CA01-TA01_1_CTG3_1',
	'NW_016107302.1' => 'HSCHR19KIR_CA01-TA01_2_CTG3_1',
	'NW_016107303.1' => 'HSCHR19KIR_CA01-TB04_CTG3_1',
	'NW_016107304.1' => 'HSCHR19KIR_CA01-TB01_CTG3_1',
	'NW_016107305.1' => 'HSCHR19KIR_HG2394_CTG3_1',
	'NW_016107306.1' => 'HSCHR19KIR_502960008-2_CTG3_1',
	'NW_016107307.1' => 'HSCHR19KIR_502960008-1_CTG3_1',
	'NW_016107308.1' => 'HSCHR19KIR_0010-5217-AB_CTG3_1',
	'NW_016107309.1' => 'HSCHR19KIR_7191059-1_CTG3_1',
	'NW_016107310.1' => 'HSCHR19KIR_0019-4656-B_CTG3_1',
	'NW_016107311.1' => 'HSCHR19KIR_CA04_CTG3_1',
	'NW_016107313.1' => 'HSCHR19KIR_7191059-2_CTG3_1',
	'NW_016107314.1' => 'HSCHR19KIR_HG2396_CTG3_1',
	'NW_016107312.1' => 'HSCHR19KIR_HG2393_CTG3_1',
	'NW_009646207.1' => 'HSCHR22_4_CTG1',
	'NW_014040930.1' => 'HSCHR22_6_CTG1',
	'NW_014040931.1' => 'HSCHR22_7_CTG1',
	'NW_009646208.1' => 'HSCHR22_5_CTG1',
	'NW_015148968.1' => 'HSCHR22_8_CTG1',
	'NW_015148969.1' => 'HG1311_PATCH',
	'NW_017363820.1' => 'HSCHRX_3_CTG7',
	'NW_018654725.1' => 'HG1531_PATCH',
	'NW_018654726.1' => 'HG1535_PATCH',
	'NW_009646209.1' => 'HG2062_PATCH',
	'NT_187515.1' => 'HSCHR1_1_CTG3',
	'NT_187517.1' => 'HSCHR1_2_CTG3',
	'NT_187514.1' => 'HSCHR1_1_CTG11',
	'NT_187520.1' => 'HSCHR1_4_CTG31',
	'NW_003315905.1' => 'HSCHR1_1_CTG31',
	'NW_003315906.1' => 'HSCHR1_2_CTG31',
	'NW_003315907.2' => 'HSCHR1_3_CTG31',
	'NT_187521.1' => 'HSCHR1_4_CTG32_1',
	'NT_187519.1' => 'HSCHR1_3_CTG32_1',
	'NT_187516.1' => 'HSCHR1_1_CTG32_1',
	'NT_187518.1' => 'HSCHR1_2_CTG32_1',
	'NT_187525.1' => 'HSCHR2_2_CTG1',
	'NT_187526.1' => 'HSCHR2_3_CTG1',
	'NT_187529.1' => 'HSCHR2_4_CTG1',
	'NT_187522.1' => 'HSCHR2_1_CTG1',
	'NW_003315908.1' => 'HSCHR2_1_CTG5',
	'NT_187524.1' => 'HSCHR2_1_CTG7',
	'NT_187531.1' => 'HSCHR2_5_CTG7_2',
	'NT_187530.1' => 'HSCHR2_4_CTG7_2',
	'NT_187528.1' => 'HSCHR2_3_CTG7_2',
	'NW_003571033.2' => 'HSCHR2_2_CTG7_2',
	'NW_003315909.1' => 'HSCHR2_1_CTG7_2',
	'NT_187527.1' => 'HSCHR2_3_CTG15',
	'NT_187523.1' => 'HSCHR2_1_CTG15',
	'NW_003871060.2' => 'HSCHR3_1_CTG1',
	'NT_187535.1' => 'HSCHR3_3_CTG1',
	'NT_187537.1' => 'HSCHR3_4_CTG2_1',
	'NW_003315913.1' => 'HSCHR3_1_CTG2_1',
	'NT_187533.1' => 'HSCHR3_2_CTG2_1',
	'NT_187536.1' => 'HSCHR3_3_CTG2_1',
	'NT_187538.1' => 'HSCHR3_5_CTG2_1',
	'NT_187532.1' => 'HSCHR3_1_CTG3',
	'NT_187534.1' => 'HSCHR3_2_CTG3',
	'NT_187539.1' => 'HSCHR3_9_CTG3',
	'NT_187540.1' => 'HSCHR4_1_CTG4',
	'NW_003315915.1' => 'HSCHR4_1_CTG6',
	'NT_187541.1' => 'HSCHR4_1_CTG8_1',
	'NT_167250.2' => 'HSCHR4_1_CTG9',
	'NT_187544.1' => 'HSCHR4_4_CTG12',
	'NW_003315914.1' => 'HSCHR4_1_CTG12',
	'NT_187542.1' => 'HSCHR4_2_CTG12',
	'NT_187545.1' => 'HSCHR4_5_CTG12',
	'NT_187543.1' => 'HSCHR4_3_CTG12',
	'NT_187550.1' => 'HSCHR5_5_CTG1',
	'NT_187548.1' => 'HSCHR5_4_CTG1',
	'NT_187547.1' => 'HSCHR5_3_CTG1',
	'NW_003315920.1' => 'HSCHR5_1_CTG1',
	'NW_003571036.1' => 'HSCHR5_2_CTG1',
	'NT_187551.1' => 'HSCHR5_6_CTG1',
	'NW_003315917.2' => 'HSCHR5_2_CTG1_1',
	'NW_003315918.1' => 'HSCHR5_3_CTG1_1',
	'NT_187549.1' => 'HSCHR5_4_CTG1_1',
	'NW_003315919.1' => 'HSCHR5_1_CTG5',
	'NT_187546.1' => 'HSCHR5_2_CTG5',
	'NT_167244.2' => 'HSCHR6_MHC_APD_CTG1',
	'NT_187555.1' => 'HSCHR6_1_CTG7',
	'NT_187554.1' => 'HSCHR6_1_CTG6',
	'NW_003315921.1' => 'HSCHR6_1_CTG2',
	'NT_187556.1' => 'HSCHR6_1_CTG8',
	'NT_187557.1' => 'HSCHR6_1_CTG9',
	'NW_004166862.2' => 'HSCHR6_1_CTG3',
	'NT_187552.1' => 'HSCHR6_1_CTG4',
	'NT_187553.1' => 'HSCHR6_1_CTG5',
	'NT_187558.1' => 'HSCHR7_1_CTG1',
	'NT_187561.1' => 'HSCHR7_2_CTG4_4',
	'NT_187559.1' => 'HSCHR7_1_CTG4_4',
	'NW_003315922.2' => 'HSCHR7_1_CTG6',
	'NT_187562.1' => 'HSCHR7_2_CTG6',
	'NT_187564.1' => 'HSCHR7_3_CTG6',
	'NT_187563.1' => 'HSCHR7_2_CTG7',
	'NT_187560.1' => 'HSCHR7_1_CTG7',
	'NT_187572.1' => 'HSCHR8_4_CTG1',
	'NT_187568.1' => 'HSCHR8_2_CTG1',
	'NT_187565.1' => 'HSCHR8_1_CTG1',
	'NT_187576.1' => 'HSCHR8_8_CTG1',
	'NT_187570.1' => 'HSCHR8_3_CTG1',
	'NT_187577.1' => 'HSCHR8_9_CTG1',
	'NT_187566.1' => 'HSCHR8_1_CTG6',
	'NT_187567.1' => 'HSCHR8_1_CTG7',
	'NT_187574.1' => 'HSCHR8_5_CTG7',
	'NT_187575.1' => 'HSCHR8_6_CTG7',
	'NT_187573.1' => 'HSCHR8_4_CTG7',
	'NT_187571.1' => 'HSCHR8_3_CTG7',
	'NT_187569.1' => 'HSCHR8_2_CTG7',
	'NW_003315928.1' => 'HSCHR9_1_CTG1',
	'NW_003315929.1' => 'HSCHR9_1_CTG2',
	'NW_003315930.1' => 'HSCHR9_1_CTG3',
	'NW_003315931.1' => 'HSCHR9_1_CTG4',
	'NT_187578.1' => 'HSCHR9_1_CTG5',
	'NW_003315934.1' => 'HSCHR10_1_CTG1',
	'NT_187579.1' => 'HSCHR10_1_CTG3',
	'NW_003315935.1' => 'HSCHR10_1_CTG2',
	'NT_187580.1' => 'HSCHR10_1_CTG4',
	'NT_187586.1' => 'HSCHR11_1_CTG8',
	'NT_187584.1' => 'HSCHR11_1_CTG6',
	'NT_187585.1' => 'HSCHR11_1_CTG7',
	'NT_187583.1' => 'HSCHR11_1_CTG5',
	'NW_003315936.1' => 'HSCHR11_1_CTG1_1',
	'NW_003871073.1' => 'HG142_HG150_NOVEL_TEST',
	'NW_003871074.1' => 'HG151_NOVEL_TEST',
	'NT_187582.1' => 'HSCHR11_1_CTG3',
	'NT_187581.1' => 'HSCHR11_1_CTG2',
	'NW_003571049.1' => 'HSCHR12_1_CTG1',
	'NW_003571050.1' => 'HSCHR12_2_CTG2',
	'NT_187588.1' => 'HSCHR12_5_CTG2',
	'NW_003315938.1' => 'HSCHR12_1_CTG2',
	'NT_187587.1' => 'HSCHR12_4_CTG2',
	'NW_003315939.2' => 'HSCHR12_1_CTG2_1',
	'NW_003315941.1' => 'HSCHR12_2_CTG2_1',
	'NW_003315942.2' => 'HSCHR12_3_CTG2_1',
	'NT_187590.1' => 'HSCHR12_6_CTG2_1',
	'NW_003315940.1' => 'HSCHR12_4_CTG2_1',
	'NT_187589.1' => 'HSCHR12_5_CTG2_1',
	'NT_187591.1' => 'HSCHR12_7_CTG2_1',
	'NT_187594.1' => 'HSCHR13_1_CTG3',
	'NT_187593.1' => 'HSCHR13_1_CTG2',
	'NT_187597.1' => 'HSCHR13_1_CTG6',
	'NT_187595.1' => 'HSCHR13_1_CTG4',
	'NT_187592.1' => 'HSCHR13_1_CTG1',
	'NT_187596.1' => 'HSCHR13_1_CTG5',
	'NT_187598.1' => 'HSCHR14_1_CTG1',
	'NT_187601.1' => 'HSCHR14_7_CTG1',
	'NT_187599.1' => 'HSCHR14_2_CTG1',
	'NT_187600.1' => 'HSCHR14_3_CTG1',
	'NT_187602.1' => 'HSCHR15_1_CTG1',
	'NT_187604.1' => 'HSCHR15_3_CTG3',
	'NT_187603.1' => 'HSCHR15_1_CTG3',
	'NW_003315943.1' => 'HSCHR15_1_CTG8',
	'NT_187605.1' => 'HSCHR15_3_CTG8',
	'NW_003315944.2' => 'HSCHR15_2_CTG8',
	'NT_187606.1' => 'HSCHR15_5_CTG8',
	'NT_187610.1' => 'HSCHR16_CTG2',
	'NT_187609.1' => 'HSCHR16_4_CTG1',
	'NT_187608.1' => 'HSCHR16_3_CTG1',
	'NT_187607.1' => 'HSCHR16_1_CTG1',
	'NW_003315945.1' => 'HSCHR16_1_CTG3_1',
	'NW_003315946.1' => 'HSCHR16_2_CTG3_1',
	'NW_003315952.3' => 'HSCHR17_1_CTG1',
	'NT_187613.1' => 'HSCHR17_2_CTG2',
	'NT_187611.1' => 'HSCHR17_1_CTG2',
	'NT_187614.1' => 'HSCHR17_7_CTG4',
	'NW_003871091.1' => 'HSCHR17_4_CTG4',
	'NW_003871092.1' => 'HSCHR17_5_CTG4',
	'NW_003315953.2' => 'HSCHR17_1_CTG4',
	'NT_167251.2' => 'HSCHR17_1_CTG5',
	'NW_003315954.1' => 'HSCHR17_2_CTG4',
	'NT_187615.1' => 'HSCHR17_8_CTG4',
	'NT_187616.1' => 'HSCHR17_9_CTG4',
	'NW_003315955.1' => 'HSCHR17_3_CTG4',
	'NT_187612.1' => 'HSCHR17_1_CTG9',
	'NT_187618.1' => 'HSCHR18_4_CTG1_1',
	'NW_003315956.1' => 'HSCHR18_1_CTG1_1',
	'NW_003315959.1' => 'HSCHR18_2_CTG1_1',
	'NW_003315960.1' => 'HSCHR18_2_CTG2',
	'NW_003315957.1' => 'HSCHR18_1_CTG2',
	'NW_003315958.1' => 'HSCHR18_1_CTG2_1',
	'NW_003315961.1' => 'HSCHR18_2_CTG2_1',
	'NT_187617.1' => 'HSCHR18_3_CTG2_1',
	'NT_187622.1' => 'HSCHR19_5_CTG2',
	'NT_187621.1' => 'HSCHR19_4_CTG2',
	'NW_003315962.1' => 'HSCHR19_1_CTG2',
	'NW_003315964.2' => 'HSCHR19_2_CTG2',
	'NW_003315965.1' => 'HSCHR19_3_CTG2',
	'NW_003315963.1' => 'HSCHR19_1_CTG3_1',
	'NT_187619.1' => 'HSCHR19_2_CTG3_1',
	'NT_187620.1' => 'HSCHR19_3_CTG3_1',
	'NW_003571054.1' => 'HSCHR19LRC_COX1_CTG3_1',
	'NW_003315966.2' => 'HSCHR20_1_CTG1',
	'NT_187623.1' => 'HSCHR20_1_CTG2',
	'NT_187625.1' => 'HSCHR20_1_CTG4',
	'NT_187624.1' => 'HSCHR20_1_CTG3',
	'NW_003315967.2' => 'HSCHR21_1_CTG1_1',
	'NT_187628.1' => 'HSCHR21_8_CTG1_1',
	'NT_187627.1' => 'HSCHR21_6_CTG1_1',
	'NW_003315968.2' => 'HSCHR21_2_CTG1_1',
	'NW_003315969.2' => 'HSCHR21_3_CTG1_1',
	'NW_003315970.2' => 'HSCHR21_4_CTG1_1',
	'NT_187626.1' => 'HSCHR21_5_CTG2',
	'NT_187629.1' => 'HSCHR22_1_CTG3',
	'NT_187632.1' => 'HSCHR22_1_CTG6',
	'NT_187633.1' => 'HSCHR22_1_CTG7',
	'NT_187630.1' => 'HSCHR22_1_CTG4',
	'NT_187631.1' => 'HSCHR22_1_CTG5',
	'NW_003315972.2' => 'HSCHR22_1_CTG2',
	'NW_003315971.2' => 'HSCHR22_1_CTG1',
	'NT_187634.1' => 'HSCHRX_1_CTG3',
	'NT_187635.1' => 'HSCHRX_2_CTG12',
	'NT_187646.1' => 'HSCHR1_ALT2_1_CTG32_1',
	'NT_187648.1' => 'HSCHR2_2_CTG7',
	'NT_187647.1' => 'HSCHR2_2_CTG15',
	'NT_187649.1' => 'HSCHR3_3_CTG3',
	'NT_187650.1' => 'HSCHR4_6_CTG12',
	'NT_187651.1' => 'HSCHR5_1_CTG1_1',
	'NT_187652.1' => 'HSCHR5_3_CTG5',
	'NT_113891.3' => 'HSCHR6_MHC_COX_CTG1',
	'NT_187653.1' => 'HSCHR7_2_CTG1',
	'NT_187655.1' => 'HSCHR8_6_CTG1',
	'NT_187654.1' => 'HSCHR8_5_CTG1',
	'NT_187656.1' => 'HSCHR11_2_CTG1',
	'NT_187657.1' => 'HSCHR11_2_CTG1_1',
	'NT_187658.1' => 'HSCHR12_3_CTG2',
	'NT_187659.1' => 'HSCHR15_2_CTG3',
	'NT_187660.1' => 'HSCHR15_4_CTG8',
	'NT_187662.1' => 'HSCHR17_2_CTG1',
	'NT_187664.1' => 'HSCHR17_3_CTG2',
	'NT_187661.1' => 'HSCHR17_10_CTG4',
	'NW_003871093.1' => 'HSCHR17_6_CTG4',
	'NT_187663.1' => 'HSCHR17_2_CTG5',
	'NT_187665.1' => 'HSCHR18_ALT21_CTG2_1',
	'NT_187666.1' => 'HSCHR18_ALT2_CTG2_1',
	'NW_003571055.2' => 'HSCHR19LRC_COX2_CTG3_1',
	'NW_004504305.1' => 'HSCHR22_2_CTG1',
	'NT_187667.1' => 'HSCHRX_2_CTG3',
	'NT_187678.1' => 'HSCHR3_4_CTG3',
	'NT_187679.1' => 'HSCHR4_7_CTG12',
	'NT_167245.2' => 'HSCHR6_MHC_DBB_CTG1',
	'NT_187680.1' => 'HSCHR8_7_CTG1',
	'NT_187681.1' => 'HSCHR11_3_CTG1',
	'NW_003571056.2' => 'HSCHR19LRC_LRC_I_CTG3_1',
	'NT_187682.1' => 'HSCHR22_3_CTG1',
	'NT_187688.1' => 'HSCHR3_5_CTG3',
	'NT_167246.2' => 'HSCHR6_MHC_MANN_CTG1',
	'NW_003571057.2' => 'HSCHR19LRC_LRC_J_CTG3_1',
	'NT_187689.1' => 'HSCHR3_6_CTG3',
	'NT_167247.2' => 'HSCHR6_MHC_MCF_CTG1',
	'NW_003571058.2' => 'HSCHR19LRC_LRC_S_CTG3_1',
	'NT_187690.1' => 'HSCHR3_7_CTG3',
	'NT_167248.2' => 'HSCHR6_MHC_QBL_CTG1',
	'NW_003571059.2' => 'HSCHR19LRC_LRC_T_CTG3_1',
	'NT_187691.1' => 'HSCHR3_8_CTG3',
	'NT_167249.2' => 'HSCHR6_MHC_SSTO_CTG1',
	'NW_003571060.1' => 'HSCHR19LRC_PGF1_CTG3_1',
	'NT_187692.1' => 'HSCHR6_8_CTG1',
	'NW_003571061.2' => 'HSCHR19LRC_PGF2_CTG3_1',
	'NT_187693.1' => 'HSCHR19_4_CTG3_1',
	'NT_187636.1' => 'HSCHR19KIR_FH15_B_HAP_CTG3_1',
	'NT_187637.1' => 'HSCHR19KIR_G085_A_HAP_CTG3_1',
	'NT_187638.1' => 'HSCHR19KIR_G085_BA1_HAP_CTG3_1',
	'NT_187639.1' => 'HSCHR19KIR_G248_A_HAP_CTG3_1',
	'NT_187640.1' => 'HSCHR19KIR_G248_BA2_HAP_CTG3_1',
	'NT_187641.1' => 'HSCHR19KIR_GRC212_AB_HAP_CTG3_1',
	'NT_187642.1' => 'HSCHR19KIR_GRC212_BA1_HAP_CTG3_1',
	'NT_187643.1' => 'HSCHR19KIR_LUCE_A_HAP_CTG3_1',
	'NT_187644.1' => 'HSCHR19KIR_LUCE_BDEL_HAP_CTG3_1',
	'NT_187645.1' => 'HSCHR19KIR_RSH_A_HAP_CTG3_1',
	'NT_187668.1' => 'HSCHR19KIR_RSH_BA2_HAP_CTG3_1',
	'NT_187669.1' => 'HSCHR19KIR_T7526_A_HAP_CTG3_1',
	'NT_187670.1' => 'HSCHR19KIR_T7526_BDEL_HAP_CTG3_1',
	'NT_187671.1' => 'HSCHR19KIR_ABC08_A1_HAP_CTG3_1',
	'NT_187672.1' => 'HSCHR19KIR_ABC08_AB_HAP_C_P_CTG3_1',
	'NT_187673.1' => 'HSCHR19KIR_ABC08_AB_HAP_T_P_CTG3_1',
	'NT_187674.1' => 'HSCHR19KIR_FH05_A_HAP_CTG3_1',
	'NT_187675.1' => 'HSCHR19KIR_FH05_B_HAP_CTG3_1',
	'NT_187676.1' => 'HSCHR19KIR_FH06_A_HAP_CTG3_1',
	'NT_187677.1' => 'HSCHR19KIR_FH06_BA1_HAP_CTG3_1',
	'NT_187683.1' => 'HSCHR19KIR_FH08_A_HAP_CTG3_1',
	'NT_187684.1' => 'HSCHR19KIR_FH08_BAX_HAP_CTG3_1',
	'NT_187685.1' => 'HSCHR19KIR_FH13_A_HAP_CTG3_1',
	'NT_187686.1' => 'HSCHR19KIR_FH13_BA2_HAP_CTG3_1',
	'NT_187687.1' => 'HSCHR19KIR_FH15_A_HAP_CTG3_1',
	'NT_113949.2' => 'HSCHR19KIR_RP5_B_HAP_CTG3_1'    
	);
my %name_to_ac = map { $ac_to_name{$_} => $_ } keys %ac_to_name;


my $method = 'genebuild';

my $accepted_biotypes = undef;	# no filtering
#my $accepted_biotypes = { map {$_ => 1} qw(protein_coding pseudogene miRNA snRNA snoRNA processed_transcript) };

############################################################################

Log::Log4perl->init_once( "$root/etc/logging.conf" );
my $logger = Log::Log4perl->get_logger();

GetOptions($opts,
	   'config|C=s',
	   'divisor|d=i',
	   'host|h=s',
	   'modulus|m=i',
	   'npartitions|n=i',
	   'port|p=s',
	   'prefix=s',
	   'user|u=s',
	   'primary-only+',
    )
    || die("$0: you got usage issues, homey\n");


my $eversion = software_version();
my $origin = "ensembl-$eversion";
my $prefix = $opts->{prefix} || "$origin-$opts->{port}";
mkdir($prefix);


my $registry = 'Bio::EnsEMBL::Registry';
$registry->load_registry_from_db(
    -host => $opts->{host},
    -user => $opts->{user},
    -port => $opts->{port},
    -pass => $opts->{pass},
    -species => 'homo sapiens',
    );

#$registry->version_check()
#    || die("Version check failed");

my $ga = $registry->get_adaptor( 'homo sapiens', 'core', 'gene' );
my $sa = $registry->get_adaptor( 'homo sapiens', 'core', 'slice' );
my $ta = $registry->get_adaptor( 'homo sapiens', 'core', 'transcript' );

$logger->info(sprintf("connected to %s @ %s:%s", 
		      $ga->dbc()->dbname(), $opts->{host}, $opts->{port}));


my @Genes;
if (@ARGV) {
    @Genes = map {fetch_Gene_by_name($ga,$_)} @ARGV;
} else {
    @Genes = @{$ga->fetch_all()};
}

my $n0 = $#Genes+1;
@Genes = grep {$_->external_db() eq 'HGNC'} @Genes;
my $n1 = $#Genes+1;
$logger->info(sprintf("%d Genes fetched, %d after filtering for HGNC", $n0, $n1));

process_genes($opts,@Genes);

exit(0);

############################################################################

sub fetch_Gene_by_name($$) {
    my ($ga, $hgnc) = @_;
    my @Genes = @{ $ga->fetch_all_by_external_name($hgnc,'HGNC') };

    # Limit replies to only those in HGNC
    # For some reason, fetch_all_... doesn't honor the 'HGNC' arg above. Sigh.
    @Genes = grep {$_->external_db() eq 'HGNC' and $_->external_name() eq $hgnc} @Genes;

    if ($#Genes == -1) {
	$logger->error("gene $hgnc is not in Ensembl");
	return;
    }
    foreach my $g (@Genes) {
	if ($hgnc ne $g->external_name()) {
	    $logger->warn(sprintf("Requested gene %s; got reply for %s",
				  $hgnc, $g->external_name()));
	}
    }
    return @Genes;
}


sub process_genes($@) {
    my ($opts, @genes) = @_;
    my $ngenes = $#genes+1;
    my $npart = $ngenes < $opts->{npartitions} ? 1 : $opts->{npartitions};
    my $nper = $ngenes/$npart;
    @genes = sort {$a->external_name() cmp $b->external_name()} @genes;
    for(my $i=0; $i<$npart; $i++) {
	next if (defined $opts->{modulus} 
		 and defined $opts->{divisor}
		 and $i % $opts->{divisor} != $opts->{modulus});
	my $s = int($nper * $i);
	my $e = int($nper * ($i+1)) - 1;
	my $pfx = sprintf("$prefix/%04d",$i);
	my @subgenes = @genes[$s..$e];
	my $msg = sprintf("subset: $pfx: %d/%d (%.1f%%) [%d, %d] = [%s,%s]...",
			  ($i+1), $npart, ($i+1)/$npart*100, $s, $e,
			  $subgenes[0]->external_name(), $subgenes[$#subgenes]->external_name());
	if (-d $pfx) {
	    $logger->info($msg . "$pfx already exists; skipping");
	} else {
	    my $tpfx = $pfx . ".tmp";
	    remove_tree($tpfx);
	    mkdir($tpfx);
	    $logger->info($msg);
	    process_subgenes(\@subgenes, $tpfx);
	    rename($tpfx,$pfx);
	}
    }
}


sub process_subgenes($$) {
    my ($subgenes,$prefix) = @_;

    my $tiw_fn = "$prefix/txinfo.gz";
    my $esw_fn = "$prefix/exonset.gz";
    my $faw_fn = "$prefix/fasta.gz";
    my $aaw_fn = "$prefix/assocacs.gz";

    my $tiw = IO::Zlib->new("$tiw_fn.tmp", "wb");
    (defined $tiw)
	|| die("$tiw_fn.tmp: $!");
    my $esw = IO::Zlib->new("$esw_fn.tmp", "wb");
    my $faw = IO::Zlib->new("$faw_fn.tmp", "wb");
    my $aaw = IO::Zlib->new("$aaw_fn.tmp", "wb");

    $tiw->print( join("\t",qw(origin ac hgnc cds_se_i exons_se_i)), "\n" );
    $esw->print( join("\t",qw(tx_ac alt_ac method strand exons_se_i)), "\n" );
    $aaw->print( join("\t",qw(hgnc tx_ac pro_ac origin)), "\n" );

    for(my $i=0; $i<=$#$subgenes; $i++) {
	my $g = $subgenes->[$i];
	process1($g,$tiw,$esw,$faw,$aaw);
	$logger->info(sprintf("%d/%d (%.1f%%): gene %s\n",
			      ($i+1), ($#$subgenes+1), ($i+1)/($#$subgenes+1)*100,
			      $g->external_name()));
    }
    
    $tiw->close();
    $esw->close();
    $faw->close();
    $aaw->close();

    rename("$tiw_fn.tmp",$tiw_fn);
    rename("$esw_fn.tmp",$esw_fn);
    rename("$faw_fn.tmp",$faw_fn);
    rename("$aaw_fn.tmp",$aaw_fn);
}



my %tx_seen;
sub process1($$$$$) {
    my ($g,$tiw,$esw,$faw,$aaw) = @_;
    my $hgnc = $g->external_name();

    my @tx = @{ $g->get_all_Transcripts };
    $logger->info(sprintf("%d transcripts for gene %s\n",$#tx+1,$hgnc));

    foreach my $tx (@tx) {
	if (exists $tx_seen{$tx->display_id}) {
	    my @others = sort(@{$tx_seen{$tx->display_id}});
	    $logger->warn(sprintf("gene %s: %s already seen for %d genes (%s); skipping",
				  $hgnc, $tx->display_id, $#others+1, join(",",@others)));
	    push(@{$tx_seen{$tx->display_id}},$hgnc);
	    next;
	}
	push(@{$tx_seen{$tx->display_id}},$hgnc);

	if (defined $accepted_biotypes and not exists $accepted_biotypes->{$tx->biotype}) {
	    $logger->info(sprintf("%s: is type %s; skipping",$tx->display_id,$tx->biotype));
	    next;
	}

	my $hgnc = $tx->get_Gene()->external_name();
	my $tx_c = @{$tx->project('chromosome')}[0];
	if (not defined $tx_c) {
	    $logger->error(sprintf("Can't project %s onto a chromosome; skipping",$tx->display_id));
	    next;
	}
	my $srn = $tx_c->to_Slice()->seq_region_name;
	if (not exists $name_to_ac{$srn}) {
	    $logger->warn(sprintf("gene %s, tx %s (%s): on %s, no accession available; skipping",
				  $hgnc, $tx->display_id(), $g->biotype(), $srn));
	    next;
	}
	my $ac = $name_to_ac{$srn};
	if ($opts->{primary_only} and $ac !~ m/^NC_/) {
	    $logger->warn(sprintf("gene %s, tx %s (%s): on non-chromosomal sequence %s (%s); skipping",
				  $hgnc, $tx->display_id(), $g->biotype(), $ac, $srn));
	    next;
	}
	my $seq = $tx->seq->seq;
	my $pseq = $tx->translate();

	$logger->info(sprintf("%s: %s (%s; %d nt); %s\n",
			      $hgnc, $tx->display_id, 
			      $tx->strand, length($seq),
			      defined($pseq) ? $pseq->display_id : $tx->biotype,
			      ));

	# write sequence and seqinfo
	$faw->print(">",$tx->display_id,"\n",$seq,"\n");
	if (defined $pseq) {
	    $faw->print(">",$pseq->display_id,"\n",$pseq->seq,"\n");
	    $aaw->print(join("\t", $hgnc, $tx->display_id, $pseq->display_id, $origin), "\n");
	}

	# write exonset
	my @g_exons = @{ $tx->get_all_Exons() };
	my $g_exons_str = join(';', map( sprintf("%d,%d",$_->start()-1,$_->end()), @g_exons ));
	$esw->print( join("\t", $tx->display_id(), $ac, $method, $tx->strand(), $g_exons_str), "\n");

	# write txinfo
	my $tm = $tx->get_TranscriptMapper();
	my @c_exon_coords = map( $tm->genomic2cdna($_->start(),$_->end(),$tx->strand), @g_exons );
	my $c_exon_str = join(';', map( sprintf("%d,%d",$_->start()-1,$_->end()), @c_exon_coords ));
	my $cds_se_i = '';
	if ($tx->biotype eq 'protein_coding') {
	    $cds_se_i = sprintf("%d,%d",$tx->cdna_coding_start-1,$tx->cdna_coding_end);
	}
	$tiw->print(join("\t", $origin, $tx->display_id(), $hgnc, $cds_se_i, $c_exon_str),"\n");
    }
}


## <LICENSE>
## Copyright 2014 UTA Contributors (https://bitbucket.org/biocommons/uta)
## 
## Licensed under the Apache License, Version 2.0 (the "License");
## you may not use this file except in compliance with the License.
## You may obtain a copy of the License at
## 
##     http://www.apache.org/licenses/LICENSE-2.0
## 
## Unless required by applicable law or agreed to in writing, software
## distributed under the License is distributed on an "AS IS" BASIS,
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
## See the License for the specific language governing permissions and
## limitations under the License.
## </LICENSE>
